//clean complaint db received from SLBFE

clear
set memory 500m
set matsize 100
set more off 
# delimit;


* Set directory; 

cd "~/Desktop/migrec_replication/do/";

** STEP 1: How do I figure out what the complaints are about? ;

** clean complaint codes and prepare for merge; 

insheet using "../raw/Nature.csv", comma clear;

renpfix "" nat_;

gen trouble1 = nat_code; 

rename nat_nature problem_desc;

label var problem_desc "Desription of Problem";

save ../dta/nature-codes.dta, replace; 

** Step 2: Open and clean complaints data; 


insheet using ../raw/Complain.csv, comma clear; 


merge m:1 trouble1 using ../dta/nature-codes.dta, force gen(trouble1merge);

keep if trouble1merge != 2; 


*D-sector contains the general coding of the problem ; 


gen problem_type = nat_d_sector; 

replace problem_type = "Harassment" if problem_type == "H"; 
replace problem_type = "Non-payment of wages" if problem_type == "N"; 
replace problem_type = "Breach of contract" if problem_type == "B";
replace problem_type = "Lack of Communications" if problem_type == "L";
replace problem_type = "Death" if problem_type == "D"; 
replace problem_type = "Stranded workers" if problem_type == "S"; 
replace problem_type = "Others" if problem_type == "O"; 
replace problem_type = "Employee missing" if problem_type == "M";  


save ../dta/complaints.dta, replace;



** STEP 3: Do I know when the complaint was made? ; 


* YES, it's cmpl_date; 


gen cmpl_yr = substr(cmpl_date, -7, 2) ;
replace cmpl_yr = "20" + cmpl_yr;




gen cmpl_month = substr(cmpl_date,1,2); 
gen rep_m = substr(cmpl_date,1,1); 
replace cmpl_month = rep_m if substr(cmpl_month,-1,1) == "/" ;
drop rep_m; 



/*
gen cmpl_month1 = substr(cmpl_date, -13, 2) ;
gen cmpl_month2 = substr(cmpl_date, -12, 1) ;

*/

*STEP 3.5: When did the incident actually happen? ;

gen inci_yr = substr(dateofi, -7, 2); 
replace inci_yr = "20" + inci_yr;



gen inci_month = substr(dateofi,1,2); 
gen rep_m = substr(dateofi,1,1); 
replace inci_month = rep_m if substr(inci_month,-1,1) == "/" ;

drop rep_m; 



save ../dta/complaints.dta, replace;



** STEP 4: Do I know who the complain was made about? ; 

* YES, it's 'an'; 


** STEP 5: Do I know which country? ; 

insheet using ../raw/Country.csv, comma clear; 

rename finalapp_emb_crt_unskilledfemale finalapp_unskilledfemale;

renpfix "" cn_ ;

gen zeros = ""; 

replace zeros = "00" if cn_code > 0 & cn_code <= 9; 
replace zero = "0" if cn_code > 9 & cn_code < 100; 

tostring cn_code, replace; 

replace cn_code = zeros + cn_code; 



save ../dta/country.dta, replace;

use ../dta/complaints.dta, clear; 

rename cn cn_code;

merge m:1 cn_code using ../dta/country.dta, gen(country_code) force;


keep if country_code != 2; 

save ../dta/complaints.dta, replace; 


** STEP 6: Do I know where the complainant came from in SL?;


insheet using ../raw/villeges.csv, comma clear;


renpfix "" vil_ ;

rename vil_code towncode;

save ../dta/villages.dta, replace;


use ../dta/complaints.dta, clear; 


merge m:1 towncode using ../dta/villages.dta, gen(townmerge);

keep if townmerge != 2; 


save ../dta/complaints.dta, replace; 


** STEP 7: Which local agents do these complaints correspond to? ;


insheet using "../raw/Local Agency.csv", comma clear;

renpfix "" agency_;


rename agency_lno an; 


save ../dta/agencies.dta, replace; 

use ../dta/complaints.dta, clear; 


merge m:1 an using ../dta/agencies, gen(agencymerge) force ;


keep if agencymerge != 2; 

save ../dta/complaints.dta, replace; 



** STEP 8: Which sectors do people work in?;


insheet using "../raw/Job Unit.csv", comma clear ; 

renpfix "" jobcode_ ;

rename jobcode_code job_code;

save ../dta/job_codes.dta, replace; 


use ../dta/complaints.dta, clear; 

rename jobcate job_code; 

merge m:1 job_code using ../dta/job_codes, gen(jobcodemerge); 

keep if jobcodemerge!= 2; 


** ADD DISTRICT DATA; 

merge m:1 vil_dis_code using ../dta/districts.dta, gen(district_merge); 



save ../dta/complaints.dta, replace; 



















